]> git.neil.brown.name Git - wiggle.git/commitdiff
Automatically enable --non-space on large files.
authorNeilBrown <neil@brown.name>
Sat, 29 Aug 2020 08:29:38 +0000 (18:29 +1000)
committerNeilBrown <neil@brown.name>
Sat, 29 Aug 2020 08:33:41 +0000 (18:33 +1000)
If a file has more than 50,000 words, switch on --non-space to hopefully
reduce the number of words, and hence the running time.

Signed-off-by: NeilBrown <neil@brown.name>
wiggle.1
wiggle.c

index 89047cb1303e925944a6c4a164e8a7f081453ce8..a0a62e295c560fe920283a19330993d4de92e86b 100644 (file)
--- a/wiggle.1
+++ b/wiggle.1
@@ -147,7 +147,9 @@ default for the "diff" function.
 .BR \-\-non\-space
 Request that words be defined as sequences of non-white-space.  Without
 this flag words are sequences of alphanumerics or single non-white-space
-characters.
+characters.  This flag is enabled by automatically enabled if 
+.I wiggle
+needs to compare two files which both have more than 50,000 words.
 .TP
 .BR \-l ", " \-\-lines
 Request that all operations and display be line based.
index 2347ffe9a65c03a8c7a898d0c9a6b5465970c3fb..ca0b2602e16161ca3826a4510c6a65d0e3f90558 100644 (file)
--- a/wiggle.c
+++ b/wiggle.c
@@ -430,6 +430,14 @@ static int do_diff(int argc, char *argv[], int obj, int ispatch,
        }
        fl[0] = split_stream(flist[0], obj);
        fl[1] = split_stream(flist[1], obj);
+       if (!(obj & WholeWord) && fl[0].elcnt > 50000 && fl[1].elcnt > 50000) {
+               /* Too big - use fewer words if possible */
+               free(fl[0].list);
+               free(fl[1].list);
+               obj |= WholeWord;
+               fl[0] = split_stream(flist[0], obj);
+               fl[1] = split_stream(flist[1], obj);
+       }
        if (chunks2 && !chunks1)
                csl = pdiff(fl[0], fl[1], chunks2);
        else
@@ -568,6 +576,18 @@ static int do_merge(int argc, char *argv[], int obj, int blanks,
        fl[0] = split_stream(flist[0], blanks);
        fl[1] = split_stream(flist[1], blanks);
        fl[2] = split_stream(flist[2], blanks);
+       if (!(blanks & WholeWord) &&
+           fl[1].elcnt > 50000 &&
+           (fl[0].elcnt > 50000 || fl[2].elcnt > 50000)) {
+               /* Too many words */
+               free(fl[0].list);
+               free(fl[1].list);
+               free(fl[2].list);
+               blanks |= WholeWord;
+               fl[0] = split_stream(flist[0], blanks);
+               fl[1] = split_stream(flist[1], blanks);
+               fl[2] = split_stream(flist[2], blanks);
+       }
 
        if (chunks2 && !chunks1)
                csl1 = pdiff(fl[0], fl[1], chunks2);